### Load packages

as.numeric.factor <- function(x) {as.numeric(levels(x))[x]}
library(plyr)
library(haven)
library(ggplot2)
library(ggpubr)
library(dplyr)

### Load the data

data <- read_dta("Prolific.dta")

### Define treatment

data$Treatment4 <- NA

data$ControlTime <- as.numeric(data$ControlTime_PageSubmit)
data$BurdenTime <- as.numeric(data$BurdenTime_PageSubmit)
data$RestrictiveTime <- as.numeric(data$RestrictiveTime_PageSubmit)
data$CombinedTime <- as.numeric(data$CombinedTime_PageSubmit)

data$Duration.Treatment <- NA
data$Duration.Treatment[!is.na(data$ControlTime)] <- data$ControlTime[!is.na(data$ControlTime)]
data$Duration.Treatment[!is.na(data$BurdenTime)] <- data$BurdenTime[!is.na(data$BurdenTime)]
data$Duration.Treatment[!is.na(data$RestrictiveTime)] <- data$RestrictiveTime[!is.na(data$RestrictiveTime)]
data$Duration.Treatment[!is.na(data$CombinedTime)] <- data$CombinedTime[!is.na(data$CombinedTime)]

data$Treatment4[!is.na(data$ControlTime)] <- "Control"
data$Treatment4[!is.na(data$BurdenTime)] <- "Burden"
data$Treatment4[!is.na(data$RestrictiveTime)] <- "Restrictive"
data$Treatment4[!is.na(data$CombinedTime)] <- "Combined"

data$Treatment4 <- factor(data$Treatment4, levels = c("Control", "Burden", "Restrictive", "Combined"))

## Define binary treatment

data$Treatment2 <- ifelse(data$Treatment4 == "Control", 0, 1)

### Define outcomes

## Preference

data$ProImmPref <- as.numeric.factor(as.factor(mapvalues(data$NumbersRelative, from = c("1", "2", "3", "4", ""), to = c(3/3, 2/3, 1/3, 0/3, NA))))

## Beliefs

data$ImmDifficulty <- as.numeric.factor(as.factor(mapvalues(data$Difficulty, from = c("1", "2", "3", "4", ""), to = c(3/3, 2/3, 1/3, 0/3, NA))))
data$ImmComplexity <- as.numeric.factor(as.factor(mapvalues(data$Complexity, from = c("1", "2", "3", "4", ""), to = c(0/3, 1/3, 2/3, 3/3, NA))))
data$ImmBurden <- as.numeric.factor(as.factor(mapvalues(data$Burden, from = c("1", "2", "3", "4", ""), to = c(3/3, 2/3, 1/3, 0/3, NA))))
data$ImmRestrictiveness <- as.numeric.factor(as.factor(mapvalues(data$Restrictiveness, from = c("1", "2", "3", "4", ""), to = c(0/3, 1/3, 2/3, 3/3, NA))))
data$ImmDifficultyIndex <- (data$ImmDifficulty + data$ImmComplexity + data$ImmBurden + data$ImmRestrictiveness)/4

## Pre-treatment knowledge

# Give 0.5 points for a close response
data$Visatime_1 <- as.numeric.factor(as.factor(mapvalues(data$visatime_1, from = c("1", "2", "3", "4", "5"), to = c(0.5, 1, 0.5, 0, 0)))) #87%
data$Visatime_2 <- as.numeric.factor(as.factor(mapvalues(data$visatime_2, from = c("1", "2", "3", "4", "5"), to = c(0, 0.5, 1, 0.5, 0)))) #62%
data$Visatime_3 <- as.numeric.factor(as.factor(mapvalues(data$visatime_3, from = c("1", "2", "3", "4", "5"), to = c(0, 0, 0, 0.5, 1)))) #10%
data$Visatime_4 <- as.numeric.factor(as.factor(mapvalues(data$visatime_4, from = c("1", "2", "3", "4", "5"), to = c(0, 0, 0, 0.5, 1)))) #5%
data$Visatime_5 <- as.numeric.factor(as.factor(mapvalues(data$visatime_5, from = c("1", "2", "3", "4", "5"), to = c(1, 0.5, 0, 0, 0)))) #60%
data$Visatime_6 <- as.numeric.factor(as.factor(mapvalues(data$visatime_6, from = c("1", "2", "3", "4", "5"), to = c(1, 0.5, 0, 0, 0)))) #77%
data$Visatime_7 <- as.numeric.factor(as.factor(mapvalues(data$visatime_7, from = c("1", "2", "3", "4", "5"), to = c(0.5, 1, 0.5, 0, 0)))) #66%
data$visa.know.close <- (data$Visatime_2 + data$Visatime_3 + data$Visatime_4 + data$Visatime_5 + data$Visatime_6 + data$Visatime_7)/6
#46% correct > 20% correct by chance

# Precise scores
data$Visatime_1b <- as.numeric.factor(as.factor(mapvalues(data$visatime_1, from = c("1", "2", "3", "4", "5"), to = c(0, 1, 0, 0, 0)))) #87%
data$Visatime_2b <- as.numeric.factor(as.factor(mapvalues(data$visatime_2, from = c("1", "2", "3", "4", "5"), to = c(0, 0, 1, 0, 0)))) #62%
data$Visatime_3b <- as.numeric.factor(as.factor(mapvalues(data$visatime_3, from = c("1", "2", "3", "4", "5"), to = c(0, 0, 0, 0, 1)))) #10%
data$Visatime_4b <- as.numeric.factor(as.factor(mapvalues(data$visatime_4, from = c("1", "2", "3", "4", "5"), to = c(0, 0, 0, 0, 1)))) #5%
data$Visatime_5b <- as.numeric.factor(as.factor(mapvalues(data$visatime_5, from = c("1", "2", "3", "4", "5"), to = c(1, 0, 0, 0, 0)))) #60%
data$Visatime_6b <- as.numeric.factor(as.factor(mapvalues(data$visatime_6, from = c("1", "2", "3", "4", "5"), to = c(1, 0, 0, 0, 0)))) #77%
data$Visatime_7b <- as.numeric.factor(as.factor(mapvalues(data$visatime_7, from = c("1", "2", "3", "4", "5"), to = c(0, 1, 0, 0, 0)))) #66%
data$visa.know.precise <- (data$Visatime_2b + data$Visatime_3b + data$Visatime_4b + data$Visatime_5b + data$Visatime_6b + data$Visatime_7b)/6
#32% correct is slightly better than 20% correct by chance

### Define other (pre-treatment) covariates

data$partyid <- as.factor(mapvalues(data$Party, from = c("1", "2", "3", "4"), to = c("Democrat", "Republican", "Independent", "Independent")))
data$nonDem <- as.numeric(mapvalues(data$Party, from = c("1", "2", "3", "4"), to = c(0, 1, 1, 1)))
data$nonDem[data$PartyLean == 1] <- 0
data$college2 <- ifelse(data$EdLevel == "7" | data$EdLevel == "8", 1, 0)
data$rich2 <- ifelse(data$Inc == "11" | data$Inc == "12" | data$EdLevel == "14" | data$EdLevel == "15" | data$EdLevel == "16", 1, 0)
data$foreignborn2 <- ifelse(data$USborn == "2", 1, 0)

data$RacialResentment1 <- as.numeric.factor(as.factor(mapvalues(data$rr1, from = c("1", "2", "3", "4", "5"), to = c(4/4, 3/4, 2/4, 1/4, 0/4))))
data$RacialResentment2 <- as.numeric.factor(as.factor(mapvalues(data$rr2, from = c("1", "2", "3", "4", "5"), to = c(0/4, 1/4, 2/4, 3/4, 4/4))))
data$RacialResentment3 <- as.numeric.factor(as.factor(mapvalues(data$rr3, from = c("1", "2", "3", "4", "5"), to = c(0/4, 1/4, 2/4, 3/4, 4/4))))
data$RacialResentment4 <- as.numeric.factor(as.factor(mapvalues(data$rr4, from = c("1", "2", "3", "4", "5"), to = c(4/4, 3/4, 2/4, 1/4, 0/4))))
data$RacialResentmentIndex <- (data$RacialResentment1 + data$RacialResentment2 + data$RacialResentment3 + data$RacialResentment4)/4
data$RacialResentmentIndex2 <- ifelse(data$RacialResentmentIndex >= 0.5, 1, 0)

############
### Analysis
############

# Remove one respondent with no clear treatment assignment
data <- data[!is.na(data$Treatment4),]

# Basic results
summary(lm(ProImmPref ~ Treatment2, data))
summary(lm(ImmDifficultyIndex ~ Treatment2, data))

### Updated Figure 2

# Function to calculate means and confidence intervals (no weights)
calculate_stats <- function(data, outcome, group) {
  stats <- data %>%
    group_by(!!sym(group)) %>%
    summarise(
      mean = mean(!!sym(outcome), na.rm = TRUE),
      variance = var(!!sym(outcome), na.rm = TRUE),
      n = n(),
      .groups = 'drop'
    ) %>%
    mutate(
      se = sqrt(variance / n),
      lower95 = mean - qt(0.975, n - 1) * se,
      upper95 = mean + qt(0.975, n - 1) * se
    )
  return(stats)
}

# Calculate statistics for Treatment4 (Control, Burden, Restrictive, Combined)
stats_treatment4_difficulty <- calculate_stats(data, "ImmDifficultyIndex", "Treatment4")
stats_treatment4_preference <- calculate_stats(data, "ProImmPref", "Treatment4")

# Calculate statistics for Treatment2 (0 = Placebo Control, 1 = All Treatments Combined)
stats_treatment2_combined <- data %>%
  filter(Treatment2 == 1) %>%
  mutate(Treatment4 = "All Treatments Combined")

stats_treatment2_difficulty <- calculate_stats(stats_treatment2_combined, "ImmDifficultyIndex", "Treatment4")
stats_treatment2_preference <- calculate_stats(stats_treatment2_combined, "ProImmPref", "Treatment4")

# Combine the stats from Treatment4 and the "All Treatments Combined" from Treatment2
stats_difficulty <- rbind(
  stats_treatment4_difficulty,
  stats_treatment2_difficulty
)

stats_preference <- rbind(
  stats_treatment4_preference,
  stats_treatment2_preference
)

# Rename for plotting purposes without touching the original data
stats_difficulty$Treatment4 <- factor(stats_difficulty$Treatment4, 
                                      levels = c("Control", "Burden", 
                                                 "Restrictive", "Combined", "All Treatments Combined"))

stats_preference$Treatment4 <- factor(stats_preference$Treatment4, 
                                      levels = c("Control", "Burden", 
                                                 "Restrictive", "Combined", "All Treatments Combined"))

# Re-level Treatment4 so that "Placebo Control" is the reference level for comparisons in Treatment2
data <- data %>%
  mutate(Treatment2 = factor(Treatment2, levels = c(0, 1), labels = c("Placebo Control", "All Treatments Combined")))

# Calculate the linear models for difference-in-means
model_difficulty <- lm(ImmDifficultyIndex ~ Treatment4, data = data)
model_preference <- lm(ProImmPref ~ Treatment4, data = data)

# Extract the difference-in-means (d) and p-values for difficulty (Treatment4 for Burden and Restrictive)
coef_difficulty_burden <- summary(model_difficulty)$coefficients["Treatment4Burden", c("Estimate", "Pr(>|t|)")]
coef_difficulty_restrictive <- summary(model_difficulty)$coefficients["Treatment4Restrictive", c("Estimate", "Pr(>|t|)")]

# Extract the difference-in-means (d) and p-values for the combined treatment from Treatment4
coef_difficulty_combined <- summary(model_difficulty)$coefficients["Treatment4Combined", c("Estimate", "Pr(>|t|)")]

# Extract the difference-in-means (d) and p-values for the "All Treatments Combined" effect from Treatment2
model_combined_difficulty <- lm(ImmDifficultyIndex ~ Treatment2, data = data)
coef_difficulty_all_combined <- summary(model_combined_difficulty)$coefficients["Treatment2All Treatments Combined", c("Estimate", "Pr(>|t|)")]

# Create labels for difficulty (d and p-value), ensuring correct formatting for p-values
label_difficulty_burden <- sprintf("d=%.3f, p=%s", 
                                   coef_difficulty_burden[1], 
                                   format.pval(coef_difficulty_burden[2], eps = 0.01, digits = 2))

label_difficulty_restrictive <- sprintf("d=%.3f, p=%s", 
                                        coef_difficulty_restrictive[1], 
                                        format.pval(coef_difficulty_restrictive[2], eps = 0.001, digits = 2))

label_difficulty_combined <- sprintf("d=%.3f, p=%s", 
                                     coef_difficulty_combined[1], 
                                     format.pval(coef_difficulty_combined[2], eps = 0.001, digits = 2))

label_difficulty_all_combined <- sprintf("d=%.3f, p=%s", 
                                         coef_difficulty_all_combined[1], 
                                         format.pval(coef_difficulty_all_combined[2], eps = 0.001, digits = 2))

# Repeat the same steps for the preference index (ProImmPref)
coef_preference_burden <- summary(model_preference)$coefficients["Treatment4Burden", c("Estimate", "Pr(>|t|)")]
coef_preference_restrictive <- summary(model_preference)$coefficients["Treatment4Restrictive", c("Estimate", "Pr(>|t|)")]

# Extract the difference-in-means (d) and p-values for the combined treatment from Treatment4
coef_preference_combined <- summary(model_preference)$coefficients["Treatment4Combined", c("Estimate", "Pr(>|t|)")]

# Extract the difference-in-means (d) and p-values for the "All Treatments Combined" effect from Treatment2
model_combined_preference <- lm(ProImmPref ~ Treatment2, data = data)
coef_preference_all_combined <- summary(model_combined_preference)$coefficients["Treatment2All Treatments Combined", c("Estimate", "Pr(>|t|)")]

# Create labels for preference (d and p-value), ensuring correct formatting for p-values
label_preference_burden <- sprintf("d=%.3f, p=%s", 
                                   coef_preference_burden[1], 
                                   format.pval(coef_preference_burden[2], eps = 0.01, digits = 2))

label_preference_restrictive <- sprintf("d=%.3f, p=%s", 
                                        coef_preference_restrictive[1], 
                                        format.pval(coef_preference_restrictive[2], eps = 0.01, digits = 2))

label_preference_combined <- sprintf("d=%.3f, p=%s", 
                                     coef_preference_combined[1], 
                                     format.pval(coef_preference_combined[2], eps = 0.01, digits = 2))

label_preference_all_combined <- sprintf("d=%.3f, p=%s", 
                                         coef_preference_all_combined[1], 
                                         format.pval(coef_preference_all_combined[2], eps = 0.01, digits = 2))

# Plot for ImmDifficultyIndex with all treatments and brackets, renaming levels in the plot
p1 <- ggplot(stats_difficulty, aes(x = Treatment4, y = mean)) +
  geom_point() +
  geom_errorbar(aes(ymin = lower95, ymax = upper95), width = 0.15) +
  ylab("Believe Immigration is Difficult (Index)\n") + ylim(0.35, 0.9) +
  theme_minimal() +
  theme(text = element_text(size = 18),
        axis.title.x = element_blank()) +
  scale_x_discrete(labels = c("Control" = "Placebo\nControl", 
                              "Burden" = "Burdensome\nTreatment", 
                              "Restrictive" = "Restrictive\nTreatment", 
                              "Combined" = "Combined\nTreatment", 
                              "All Treatments Combined" = "All Treatments\nCombined")) +  # Renaming for plot
  geom_bracket(
    xmin = "Control", xmax = "Burden", 
    y.position = 0.825, label = label_difficulty_burden, label.size = 4
  ) +
  geom_bracket(
    xmin = "Control", xmax = "Restrictive", 
    y.position = 0.85, label = label_difficulty_restrictive, label.size = 4
  ) +
  geom_bracket(
    xmin = "Control", xmax = "Combined", 
    y.position = 0.875, label = label_difficulty_combined, label.size = 4
  ) +
  geom_bracket(
    xmin = "Control", xmax = "All Treatments Combined", 
    y.position = 0.9, label = label_difficulty_all_combined, label.size = 4
  )

# Plot for ProImmPref with all treatments and brackets, renaming levels in the plot
p2 <- ggplot(stats_preference, aes(x = Treatment4, y = mean)) +
  geom_point() +
  geom_errorbar(aes(ymin = lower95, ymax = upper95), width = 0.15) +
  ylab("Prefer Increasing Immigration\n") + ylim(0.35, 0.9) +
  theme_minimal() +
  theme(text = element_text(size = 18),
        axis.title.x = element_blank()) +
  scale_x_discrete(labels = c("Control" = "Placebo\nControl", 
                              "Burden" = "Burdensome\nTreatment", 
                              "Restrictive" = "Restrictive\nTreatment", 
                              "Combined" = "Combined\nTreatment", 
                              "All Treatments Combined" = "All Treatments\nCombined")) +  # Renaming for plot
  geom_bracket(
    xmin = "Control", xmax = "Burden", 
    y.position = 0.825, label = label_preference_burden, label.size = 4
  ) +
  geom_bracket(
    xmin = "Control", xmax = "Restrictive", 
    y.position = 0.85, label = label_preference_restrictive, label.size = 4
  ) +
  geom_bracket(
    xmin = "Control", xmax = "Combined", 
    y.position = 0.875, label = label_preference_combined, label.size = 4
  ) +
  geom_bracket(
    xmin = "Control", xmax = "All Treatments Combined", 
    y.position = 0.9, label = label_preference_all_combined, label.size = 4
  )

# Combine the plots
combined_figure <- gridExtra::grid.arrange(p1, p2, ncol = 2)

# Save the combined figure
ggsave("Figure2.pdf", plot = combined_figure, width = 14, height = 5.5, dpi = 300)
